home *** CD-ROM | disk | FTP | other *** search
Text File | 2000-10-06 | 11.7 KB | 427 lines | [TEXT/CWIE] |
- ///--------------------------------------------------------------------------------------
- // BlitPixieDoubled
- // a fast pixel-doubling blitter
- //
- // written by Anders F Björklund <afb@algonet.se>
- // ©1999 afb.
- ///--------------------------------------------------------------------------------------
-
- #ifndef __BLITPIXIE__
- #include "BlitPixieHeader.h"
- #endif
-
- #include "BlitPixieAsm.h"
-
- #pragma mark *** PowerPC asm:
- #if USE_PPC_ASSEMBLY
-
- ASM_FUNC void BlitPixieDoubled8Bit(
- register unsigned char *source, register unsigned char *destination,
- register unsigned long srcRowBytes, register unsigned long dstRowBytes,
- register unsigned short width, register unsigned short height)
- {
- #define r_src r3
- #define r_dst r4
- #define r_srcRowBytes r5
- #define r_dstRowBytes r6
- #define r_width r7
- #define r_height r8
-
- #define r_dst2 r9
- #define r_temp1 r10
- #define r_temp2 r11
-
- ASM_BEGIN
-
- subi r_dst,r_dst,8
- add r_dst2,r_dst,r_dstRowBytes
- sub r_srcRowBytes,r_srcRowBytes,r_width // subtract width from rowbytes for stride
- sub r_dstRowBytes,r_dstRowBytes,r_width // subtract width from rowbytes for stride
- add r_dstRowBytes,r_dstRowBytes,r_dstRowBytes
- rlwinm r_width,r_width,32-2,2,31
- subi r_src,r_src,4
- subi r_width,r_width,1
-
- @rowloop:
- lwzu r0,4(r_src) // load 4 pixels into r0
-
- /* Pixel building process:
- r0: ABCD
- r10: A*** AAB* AABB AABB****
- r11: ***D *CDD CCDD ****CCDD
- fp0: AABBCCDD
- */
-
- mr r10,r0 // put copy in r10
- mr r11,r0 // and in r11
-
- rlwimi r10,r0,24,8,23 // copy upper 16 bits to middle of r10
- rlwimi r11,r0,8,8,23 // copy lower 16 bits to middle of r11
-
- rlwimi r10,r0,16,24,31 // get remaining bits into r10
- stw r10,-8(SP) // store upper 4 pixels into part of double
-
- rlwimi r11,r0,16,0,7 // get remaining bits into r11
- stw r11,-4(SP) // store lower 4 pixels into part of double
-
- mtctr r_width // copy width into counter
- lfd fp0,-8(SP) // load double
-
- @loop:
- lwzu r0,4(r_src) // load 4 pixels into r10
- stfdu fp0,8(r_dst) // store a double from before
-
- // (same as above)
- mr r10,r0 // put copy in r10
- mr r11,r0 // and in r11
- rlwimi r10,r0,24,8,23 // copy upper 16 bits to middle of r10
- rlwimi r11,r0,8,8,23 // copy lower 16 bits to middle of r11
- rlwimi r10,r0,16,24,31 // get remaining bits into r10
- stw r10,-8(SP) // store upper 4 pixels into part of double
- rlwimi r11,r0,16,0,7 // get remaining bits into r11
- stw r11,-4(SP) // store lower 4 pixels into part of double
-
- stfdu fp0,8(r_dst2) // store a double from before
- lfd fp0,-8(SP) // load double
-
- bdnz+ @loop // loop over all x
-
- stfdu fp0,8(r_dst)
-
- subic. r_height,r_height,1
- add r_src,r_src,r_srcRowBytes
- add r_dst,r_dst,r_dstRowBytes
-
- stfdu fp0,8(r_dst2) // store a double from before
- add r_dst2,r_dst2,r_dstRowBytes
-
- bne @rowloop // loop over all y
-
- ASM_END
- }
-
- ASM_FUNC void BlitPixieDoubled16Bit(
- register unsigned short *source, register unsigned short *destination,
- register unsigned long srcRowBytes, register unsigned long dstRowBytes,
- register unsigned short width, register unsigned short height)
- {
- #define r_src r3
- #define r_dst r4
- #define r_srcRowBytes r5
- #define r_dstRowBytes r6
- #define r_width r7
- #define r_height r8
-
- #define r_dst2 r9
- #define r_temp1 r10
- #define r_temp2 r11
-
- ASM_BEGIN
-
- subi r_dst,r_dst,8
- add r_dst2,r_dst,r_dstRowBytes
- sub r_srcRowBytes,r_srcRowBytes,r_width // subtract 2*width from rowbytes for stride
- sub r_srcRowBytes,r_srcRowBytes,r_width
- sub r_dstRowBytes,r_dstRowBytes,r_width // subtract 2*width from rowbytes for stride
- sub r_srcRowBytes,r_srcRowBytes,r_width
- add r_dstRowBytes,r_dstRowBytes,r_dstRowBytes
- rlwinm r_width,r_width,32-2,2,31
- subi r_src,r_src,4
- subi r_width,r_width,1
-
- @rowloop:
- lwzu r0,4(r_src) // load 2 pixels into r0
-
- /* Pixel building process:
- r0: AABB
- r10: AA** AAAA AAAA****
- r11: **BB BBBB ****BBBB
- fp0: AAAABBBB
- */
-
- mr r10,r0 // put copy in r10
- mr r11,r0 // and in r11
-
- rlwimi r10,r0,16,16,31 // copy upper 16 bits to lower of r10
- rlwimi r11,r0,16,0,15 // copy lower 16 bits to upper of r11
-
- stw r10,-8(SP) // store upper 4 pixels into part of double
- stw r11,-4(SP) // store lower 4 pixels into part of double
-
- mtctr r_width // copy width into counter
- lfd fp0,-8(SP) // load double
-
- @loop:
- lwzu r0,4(r_src) // load 2 pixels into r10
- stfdu fp0,8(r_dst) // store a double from before
-
- // (same as above)
- mr r10,r0 // put copy in r10
- mr r11,r0 // and in r11
- rlwimi r10,r0,16,16,31 // copy upper 16 bits to lower of r10
- rlwimi r11,r0,16,0,15 // copy lower 16 bits to upper of r11
- stw r10,-8(SP) // store upper 4 pixels into part of double
- stw r11,-4(SP) // store lower 4 pixels into part of double
-
- stfdu fp0,8(r_dst2) // store a double from before
- lfd fp0,-8(SP) // load double
-
- bdnz+ @loop // loop over all x
-
- stfdu fp0,8(r_dst)
-
- subic. r_height,r_height,1
- add r_src,r_src,r_srcRowBytes
- add r_dst,r_dst,r_dstRowBytes
-
- stfdu fp0,8(r_dst2) // store a double from before
- add r_dst2,r_dst2,r_dstRowBytes
-
- bne @rowloop // loop over all y
-
- ASM_END
- }
-
- #pragma mark *** 680x0 asm:
- #elif USE_68K_ASSEMBLY
-
- ASM_FUNC void BlitPixieDoubled8Bit(
- unsigned char *source, unsigned char *destination,
- unsigned long srcBytes, unsigned long dstBytes,
- unsigned short width, unsigned short height )
- {
- #define A_src A0
- #define A_dst A1
- #define A_dst2 A2
-
- #define D_srcRowBytes D3
- #define D_dstRowBytes D4
- #define D_x D5
- #define D_y D6
-
- #define D_temp1 D0
- #define D_temp2 D1
- #define D_pixel D7
-
- ASM_BEGIN
- MOVEM.L D3-D7/A2,-(SP)
-
- MOVEM.L source,A0-A1
- MOVEM.L srcBytes,D3-D6
-
- ANDI.W #~3,D_x
-
- MOVEA.L A_dst,A_dst2
- ADDA.L D_dstRowBytes,A_dst2
-
- SUB.L D_x,D_srcRowBytes
- SUB.L D_x,D_dstRowBytes
- ADD.L D_dstRowBytes,D_dstRowBytes /* two rows in dest per loop */
- LSR.W #2,D_x /* four pixels per loop */
-
- SWAP D_y /* cleverly use hi-word for y count (was out of registers) */
-
- @NextRow:
-
- MOVE.W D_x,D_y /* using 'y' for both x/y counts */
-
- @NextPixels:
-
- MOVE.L (A_src)+, D_temp1 /* get the four source pixels */
-
- MOVE.L D_temp1,D_temp2 /* make a copy */
-
- LSR.L #8,D_temp1 /* "space out" two … */
- LSR.W #8,D_temp1 /* … adjacent pixels */
- MOVE.L D_temp1,D_pixel /* copy "out spaced" pixels */
- LSL.L #8,D_pixel /* shift the copy over … */
- OR.L D_temp1,D_pixel /* … and recombine */
-
- MOVE.L D_pixel,(A_dst)+ /* write the first two pixels */
- MOVE.L D_pixel,(A_dst2)+ /* and write to the next row too */
-
- SWAP D_temp2 /* do next two pixels */
-
- LSR.L #8,D_temp2 /* repeat, as above */
- LSR.W #8,D_temp2
- MOVE.L D_temp2,D_pixel
- LSL.L #8,D_pixel
- OR.L D_temp2,D_pixel
-
- MOVE.L D_pixel,(A_dst)+ /* write the last two pixels */
- MOVE.L D_pixel,(A_dst2)+ /* and write to the next row too */
-
- SUBQ.W #1, D_y
- BNE.S @NextPixels
-
- ADDA.L D_srcRowBytes,A_src /* bump to next row */
- ADDA.L D_dstRowBytes,A_dst
- ADDA.L D_dstRowBytes,A_dst2
-
- SUB.L #0x00010000, D_y
- BNE.S @NextRow
-
- MOVEM.L (SP)+,D3-D7/A2
- ASM_END
- }
-
- ASM_FUNC void BlitPixieDoubled16Bit(
- unsigned short *source, unsigned short *destination,
- unsigned long srcBytes, unsigned long dstBytes,
- unsigned short width, unsigned short height )
- {
- #define A_src A0
- #define A_dst A1
- #define A_dst2 A2
-
- #define D_srcRowBytes D3
- #define D_dstRowBytes D4
- #define D_x D5
- #define D_y D6
-
- #define D_temp1 D0
- #define D_temp2 D1
- #define D_pixel D7
-
- ASM_BEGIN
- MOVEM.L D3-D7/A2,-(SP)
-
- MOVEM.L source,A0-A1
- MOVEM.L srcBytes,D3-D6
-
- ANDI.W #~3,D_x
-
- MOVEA.L A_dst,A_dst2
- ADDA.L D_dstRowBytes,A_dst2
-
- SUB.L D_x,D_srcRowBytes
- SUB.L D_x,D_srcRowBytes
- SUB.L D_x,D_dstRowBytes
- SUB.L D_x,D_dstRowBytes
- ADD.L D_dstRowBytes,D_dstRowBytes /* two rows in dest per loop */
- LSR.W #1,D_x /* two pixels per loop */
-
- SWAP D_y /* cleverly use hi-word for y count (was out of registers) */
-
- @NextRow:
-
- MOVE.W D_x,D_y /* using 'y' for both x/y counts */
-
- @NextPixels:
-
- MOVE.L (A_src)+, D_temp1 /* get the two source pixels */
-
- MOVE.L D_temp1,D_temp2 /* make a copy */
-
- SWAP D_temp1
- MOVE.W D_temp1,D_pixel
- SWAP D_pixel
- MOVE.W D_temp1,D_pixel
-
- MOVE.L D_pixel,(A_dst)+ /* write the first two pixels */
- MOVE.L D_pixel,(A_dst2)+ /* and write to the next row too */
-
- SWAP D_temp2 /* do next two pixels */
-
- MOVE.W D_temp2,D_pixel
- SWAP D_pixel
- MOVE.W D_temp2,D_pixel
-
- MOVE.L D_pixel,(A_dst)+ /* write the last two pixels */
- MOVE.L D_pixel,(A_dst2)+ /* and write to the next row too */
-
- SUBQ.W #1, D_y
- BNE.S @NextPixels
-
- ADDA.L D_srcRowBytes,A_src /* bump to next row */
- ADDA.L D_dstRowBytes,A_dst
- ADDA.L D_dstRowBytes,A_dst2
-
- SUB.L #0x00010000, D_y
- BNE.S @NextRow
-
- MOVEM.L (SP)+,D3-D7/A2
- ASM_END
- }
-
- #pragma mark *** Generic C:
- #elif USE_GENERIC_C
-
- void BlitPixieDoubled8Bit(
- unsigned char *source, unsigned char *destination,
- unsigned long srcRowBytes, unsigned long dstRowBytes,
- unsigned short width, unsigned short height)
- {
- int x,y;
- int x2,y2;
- unsigned char c;
-
- for ( y = 0, y2 = 0; y < height; y++, y2 += 2 )
- {
- for ( x = 0, x2 = 0; x < width; x++, x2 += 2 )
- {
- c = source[ y * srcRowBytes + x ];
-
- destination[ y2 * dstRowBytes + x2 ] = c;
- destination[ y2 * dstRowBytes + (x2 + 1) ] = c;
- destination[ (y2 + 1) * dstRowBytes + x2 ] = c;
- destination[ (y2 + 1) * dstRowBytes + (x2 + 1) ] = c;
- }
- }
- }
-
- void BlitPixieDoubled16Bit(
- unsigned short *source, unsigned short *destination,
- unsigned long srcRowBytes, unsigned long dstRowBytes,
- unsigned short width, unsigned short height)
- {
- int x,y;
- int x2,y2;
- unsigned short c;
-
- for ( y = 0, y2 = 0; y < height; y++, y2 += 2 )
- {
- for ( x = 0, x2 = 0; x < width; x++, x2 += 2 )
- {
- c = source[ y * srcRowBytes + x ];
-
- destination[ y2 * dstRowBytes + x2 ] = c;
- destination[ y2 * dstRowBytes + (x2 + 1) ] = c;
- destination[ (y2 + 1) * dstRowBytes + x2 ] = c;
- destination[ (y2 + 1) * dstRowBytes + (x2 + 1) ] = c;
- }
- }
- }
-
- #endif
-
- #pragma mark -
-
- #ifndef GENERATINGASM // do not include for asm file generation
-
- void BlitPixieDoubled32Bit(
- unsigned long *source, unsigned long *destination,
- unsigned long srcRowBytes, unsigned long dstRowBytes,
- unsigned short width, unsigned short height)
- {
- int x,y;
- int x2,y2;
- unsigned long c;
-
- for ( y = 0, y2 = 0; y < height; y++, y2 += 2 )
- {
- for ( x = 0, x2 = 0; x < width; x++, x2 += 2 )
- {
- c = source[ y * srcRowBytes + x ];
-
- destination[ y2 * dstRowBytes + x2 ] = c;
- destination[ y2 * dstRowBytes + (x2 + 1) ] = c;
- destination[ (y2 + 1) * dstRowBytes + x2 ] = c;
- destination[ (y2 + 1) * dstRowBytes + (x2 + 1) ] = c;
- }
- }
- }
-
- #endif
-
-